library(RMySQL)
con <- dbConnect(MySQL(),host="192.168.3.139",
                 dbname="niek_160906",user="niek",password="FbWf8AKC")  
dbSendQuery(con,'set names utf8')
# qd_price<-dbGetQuery(con,"select * from ha_price where city_code='qd'")
qd_info<-dbGetQuery(con,"select * from ha_info where city_code='qd'")
qd_bldg<-dbGetQuery(con,"select * from ha_bldg where city_code='qd'")
qd_phase<-dbGetQuery(con,"select * from ha_phase where city_code='qd'")
# qd_hushu<-dbGetQuery(con,"select * from ha_hushu where city_code='qd'")
# qd_gps<-dbGetQuery(con,"select * from ha_gps where city_code='qd'")
# qd_poi<-dbGetQuery(con,"select * from ha_poi where city_code='qd'")
# qd_poi_gps<-dbGetQuery(con,"select * from ha_poi_gps where city_code='qd'")

#抽取青岛住宅出售数据
qd_price_sale11<-dbGetQuery(con,"select city_code,ymid,ha_code,proptype,saleprice,salebldgarea,salecount 
                      from ha_price where city_code='qd' and proptype=11")
#删除缺失行
qd_price_sale11<-na.omit(qd_price_sale11) 
#融合info数据
qd_pi_sale11<-merge(qd_price_sale11,qd_info[,c('ha_code','volume_rate','greening_rate')],by='ha_code',all.x=TRUE)

#融合bldg数据
library(nnet)
qd_bldg2<-cbind(qd_bldg,class.ind(qd_bldg$bldg_type))
qd_pib_sale11<-merge(qd_pi_sale11,qd_bldg2[,c(-1,-3,-4)],by='ha_code',all.x=TRUE)

#融合phase数据
qd_pibp_sale11<-merge(qd_pib_sale11,qd_phase[,2:3],by='ha_code',all.x=TRUE)


#融合poi数据

#POI分类关系
# poi_table<-dbGetQuery(con,"select ha_cl_name,ha_cl_name_child from ha_poi group by ha_cl_name,ha_cl_name_child")

#探索缺失值
# library(mice)
# md.pattern(data)

#计数
#sum(table(unique(qd_pi_sale11$ha_code)))

#处理缺失值：随机森林迭代法
# library(missForest)
# data.full<-missForest(data)$ximp

#建模
date<-sort(unique(qd_pibp_sale11$ymid))
len<-length(date)
result<-as.data.frame(cbind(dt<-rep(0,len),fit<-rep(0,len),r2<-rep(0,len)))
names(result)<-c('dt','fit','r2')
for(i in 1:len){
    variable<-names(qd_pibp_sale11) %in% c('ha_code','city_code','proptype','salecount')
    qd_sale11<-qd_pibp_sale11[!variable]
    qd_sale11.new<-subset(qd_sale11,ymid==date[i])
    qd_fit<-lm(saleprice~.,data = qd_sale11.new[,names(qd_sale11.new)!='ymid'])
    qd_fit.sum<-summary(qd_fit)
    qd_fit.squ<-qd_fit.sum$r.squared
    qd_fit.coe<-qd_fit$coefficients
    result[i,1]<-date[i]
    result[i,2]<-qd_fit.coe[[1]]
    result[i,3]<-qd_fit.squ
}


